* Model count data from ACLD and HILDA
* ROC curves http://gim.unmc.edu/dxtests/roc3.htm

local working H:\Documents\HILDA Project\ARC Methodology\UQ research\Chapter 3 - Mobility\GSS 2014
local workhilda H:\Documents\HILDA Project\Data\HILDA Working\Release 16\residential mobility
local hildadir "H:\Documents\HILDA Project\Data\HILDA Release\Release 16\Stata 160c"

capture log close
log using "H:\Documents\HILDA Project\ARC Methodology\UQ research\Chapter 3 - Mobility\GSS 2014\ACLDmodelcountdata10y.log", replace

*pause on
set more off

* restrict dataset to just key variables (to avoid as many zero cells as possible)
* Get HILDA dataset, create counts and append ACLD counts
* First, for people who age into being eligible for interview, include in longitudinal sample with appropriate weight
use xwaveid wlrb_k wlrc_k wlrd_k wlrd_k wlre_k wlrf_k wlrg_k wlrh_k wlri_k wlrj_k using "`hildadir'\longitudinal_weights_p160c.dta", clear
destring(xwaveid), replace
merge 1:m xwaveid using "`workhilda'\move_long"
drop _merge
* restrict to sample equivalent to ACLD (leave age restriction to later in program)
keep if ageg>=4 & wave==11

* modify longitudinal responding person weight
tab hgage if lnwtrp>0
gen lnwtrpr2=lnwtrp
replace lnwtrpr2=wlrb_k if hgage==24
replace lnwtrpr2=wlrc_k if hgage==23
replace lnwtrpr2=wlrd_k if hgage==22
replace lnwtrpr2=wlre_k if hgage==21
replace lnwtrpr2=wlrf_k if hgage==20
replace lnwtrpr2=wlrg_k if hgage==19
replace lnwtrpr2=wlrh_k if hgage==18
replace lnwtrpr2=wlri_k if hgage==17
replace lnwtrpr2=wlrj_k if hgage==16
replace lnwtrpr2=hhwtrp if hgage==15

tab hgage if lnwtrpr2>0

* Note - this revision to the weights is now done in the setup program and the new weight is lnwtrpr
corr lnwtrpr lnwtrpr2

* calculated effective sample size
gen lnwtrprsq=lnwtrpr^2
sum lnwtrpr* if lnwtrpr>0
display 1755.108*9334
display (1755.108*9334)^2/(4430128*9334)
* ESS=6490 (9334 respondents, 16,382,178 population)

*own (26 missing as DK/Ref)
gen own=1 if hstenr==1 | hstenr==3
replace own=0 if hstenr==2 | hstenr==4

* bachelor & above (assume unknown are not bachelor)
gen bach=1 if edhigh1>=1 & edhigh1<=3
replace bach=0 if edhigh1>=4 & edhigh1<=10

* create count dataset and include variables: agegrp own bach move10y count
gen count=1
sum count if lnwtrpr>0
* count=9334

sum ageg own bach pmove lnwtrpr if lnwtrpr>0
collapse (sum) count if lnwtrpr>=0 [pweight=lnwtrpr], by(ageg own bach pmove10)
* revise count to equivalent sample size 
replace count=count*6490/16382174
sum count
* display 173*37.51445 gives 6490
drop if own==. | bach==. | pmove10==.
rename ageg agegrp
rename pmove10 move10y

* flag dataset (1=HILDA, 2= GSS, 3=ACLD)
gen ds=1
label variable ds "Source dataset"
label define d 1 "HILDA" 2 "GSS" 3 "ACLD"
label values ds d

append using "`working'\ACLD2006_2011countdata_move10y.dta"
replace ds=3 if ds==.
tab ds
sum count if ds==3
* count=120*119652.9=14,358,348
* ACLD count is lower than HILDA equivalent age as ACLD excludes:
* i) people usually resident overseas in 2011 (note the weights should adjust for people temporarily away on Census Nights 2006 and 2011 as they use PES and ERP adjustments)
* ii) people living in institutions (as tenure not asked for these)
* ACLD includes:
* i) people living in very remote Australia in 2011
* HILDA includes:
* i) people who have moved into institutions by 2011
* ii) people who have moved into very remote Australia by 2011
* HILDA excludes:
* i) people who were living overseas in last 9 years (otherwise pmove10 could not be calculated?) 
* ii) people living in NPD and very remote areas in 2001 (Census would include)

* revise count to equivalent sample size 
* ACLD is 5% sample
replace count=count*0.05 if ds==3
sum count if ds==3
* count=120*5982.647=717,918

capture drop approxcount
gen approxcount=round(count,1)

* summary of observed data
gen approxcount_move=0
gen approxcount_nomove=0
replace approxcount_move=approxcount if move10y==1
replace approxcount_nomove=approxcount if move10y==0
bys ds agegrp own: egen summove=sum(approxcount_move) 
bys ds agegrp own: egen sumnomove=sum(approxcount_nomove)
gen move10yobs=summove/(summove+sumnomove)

* model cubic splines
bspline, xvar(agegrp) knots(4 6 10 18) gen(bs) power(3)
xi:logit move10y own#c.bs1 own#c.bs2 own#c.bs3 own#c.bs4 own#c.bs5 own#c.bs6 [fweight=approxcount] if ds==1
estimates store m3
lroc
estat ic
pause

preserve
keep if ds==1
predict move10yhat
predict stdp, stdp
gen se = move10yhat * (1-move10yhat) * stdp
gen lb = move10yhat - 1.96*se
gen ub = move10yhat + 1.96*se
replace lb = 0 if lb<0
graph twoway /*rarea lb ub agegrp if own==0 /*& lb>=0 & ub<=1 */, sort fintensity(10) ///
          || rarea lb ub agegrp if own==1 /*& lb>=0 & ub<=1 */, sort fintensity(10) ///
		  */|| line move10yhat agegrp if own==0, sort ///
          || line move10yhat agegrp if own==1, sort ///
		  || line move10yobs agegrp if own==0, sort ///
		  || line move10yobs agegrp if own==1, sort ///
		  xlabel(6 (3) 18) xtick(6 (3) 18) ylabel(0 (0.1) 1) ytick(0 (0.1) 1) ///
		  legend(label(1 "model-not own") label(2 "model-own") label(3 "obs-not own") label(4 "obs-own")) ///
		  ytitle("Proportion moving in the last 10 years") xtitle("Age")  
graph save "`working'\HILDAmove10y3_own", replace
restore
pause

xi:logit move10y own#c.bs1 own#c.bs2 own#c.bs3 own#c.bs4 own#c.bs5 own#c.bs6 [fweight=approxcount] if ds==3
estimates store m3
lroc
estat ic
pause

preserve
keep if ds==3
predict move10yhat
predict stdp, stdp
gen se = move10yhat * (1-move10yhat) * stdp
gen lb = move10yhat - 1.96*se
gen ub = move10yhat + 1.96*se
replace lb = 0 if lb<0
graph twoway /*rarea lb ub agegrp if own==0 /*& lb>=0 & ub<=1 */, sort fintensity(10) ///
          || rarea lb ub agegrp if own==1 /*& lb>=0 & ub<=1 */, sort fintensity(10) ///
		  */|| line move10yhat agegrp if own==0, sort ///
          || line move10yhat agegrp if own==1, sort ///
		  || line move10yobs agegrp if own==0, sort ///
		  || line move10yobs agegrp if own==1, sort ///
		  xlabel(6 (3) 18) xtick(6 (3) 18) ylabel(0 (0.1) 1) ytick(0 (0.1) 1) ///
		  legend(label(1 "model-not own") label(2 "model-own") label(3 "obs-not own") label(4 "obs-own")) ///
		  ytitle("Proportion moving in the last 10 years") xtitle("Age") 
graph save "`working'\ACLDmove10y_own", replace
restore
pause

* effect of demographic factors, after adjusting for agegrp and sex
xi:logit move10y ds#own#c.bs1 ds#own#c.bs2 ds#own#c.bs3 ds#own#c.bs4 ds#own#c.bs5 ds#own#c.bs6 ds#bach [fweight=approxcount]
lroc
estat ic
*pause

* effect of demographic factors, after adjusting for agegrp and sex
/*
xi:logit move10y ds#own#c.bs1 ds#own#c.bs2 ds#own#c.bs3 ds#own#c.bs4 ds#own#c.bs5 ds#own#c.bs6 ds#bach#c.bs1 ds#bach#c.bs2 ds#bach#c.bs3 ds#bach#c.bs4 ds#bach#c.bs5 ds#bach#c.bs6 [fweight=approxcount]
lroc
estat ic
*/
xi:logit move10y ds#own#c.bs1 ds#own#c.bs2 ds#own#c.bs3 ds#own#c.bs4 ds#own#c.bs5 ds#own#c.bs6 ds#own#bach#c.bs1 ds#own#bach#c.bs2 ds#own#bach#c.bs3 ds#own#bach#c.bs4 ds#own#bach#c.bs5 ds#own#bach#c.bs6 [fweight=approxcount]
lroc
estat ic

xi:logit move10y ds#own#c.bs1 ds#own#c.bs2 ds#own#c.bs3 ds#own#c.bs4 ds#own#c.bs5 ds#own#c.bs6 ds#bach [fweight=approxcount]
lroc
estat ic

xi:logit move10y own#c.bs1 own#c.bs2 own#c.bs3 own#c.bs4 own#c.bs5 own#c.bs6 own#bach [fweight=approxcount] if ds==1
xi:logit move10y ds#own#c.bs1 ds#own#c.bs2 ds#own#c.bs3 ds#own#c.bs4 ds#own#c.bs5 ds#own#c.bs6 ds#own#bach [fweight=approxcount]
lroc
estat ic

test [move10y]1b.ds#0b.own#1.bach=[move10y]3.ds#0b.own#1.bach
* Not significant difference between dataset for not owning when have bachelor

test [move10y]1b.ds#0b.own#0b.bach=[move10y]3.ds#0b.own#0b.bach
* Significant diference between datasets for not owning when dont have bachelor

test [move10y]1b.ds#1.own#0b.bach = [move10y]3.ds#1.own#0b.bach
* Not significant difference betweend datasets for owning when dont have bachelor

test [move10y]1b.ds#1.own#1.bach = [move10y]3.ds#1.own#1.bach
* Constraint dropped (Not significant difference betweend datasets for owning when have bachelor)

capture drop approxcount
gen approxcount=round(count,1)

* summary of observed data
drop approxcount_move approxcount_nomove summove sumnomove move10yobs
gen approxcount_move=0
gen approxcount_nomove=0
replace approxcount_move=approxcount if move10y==1
replace approxcount_nomove=approxcount if move10y==0
bys agegrp own bach: egen summove=sum(approxcount_move) 
bys agegrp own bach: egen sumnomove=sum(approxcount_nomove)
gen move10yobs=summove/(summove+sumnomove)

*drop move10yhat stdp se lb ub
predict move10yhat
predict stdp, stdp
gen se = move10yhat * (1-move10yhat) * stdp
gen lb = move10yhat - 1.96*se
gen ub = move10yhat + 1.96*se
replace lb = 0 if lb<0
preserve
keep if ds==1
graph twoway line move10yhat agegrp if own==0 & bach==0, sort lcolor(midblue) ///
          || line move10yhat agegrp if own==1 & bach==0, sort lcolor(cranberry) ///
		  || line lb agegrp if own==0 & bach==0, sort lcolor(midblue) lpattern(shortdash) lwidth(thin) ///
		  || line ub agegrp if own==0 & bach==0, sort lcolor(midblue) lpattern(shortdash) lwidth(thin) ///
		  || line lb agegrp if own==1 & bach==0, sort lcolor(cranberry) lpattern(shortdash) lwidth(thin) ///
		  || line ub agegrp if own==1 & bach==0, sort lcolor(cranberry) lpattern(shortdash) lwidth(thin) ///
		  || line move10yhat agegrp if own==0 & bach==1, sort lcolor(midgreen) ///
          || line move10yhat agegrp if own==1 & bach==1, sort lcolor(gold) ///
		  || line lb agegrp if own==0 & bach==1, sort lpattern(shortdash) lcolor(midgreen) lwidth(thin) ///
		  || line ub agegrp if own==0 & bach==1, sort lpattern(shortdash) lcolor(midgreen) lwidth(thin) ///
		  || line lb agegrp if own==1 & bach==1, sort lpattern(shortdash) lcolor(gold) lwidth(thin) ///
		  || line ub agegrp if own==1 & bach==1, sort lpattern(shortdash) lcolor(gold) lwidth(thin) ///
		  xlabel(4 "15-19" /* 5 "20-24" */ 6 "25-29" /*7 "30-34"*/ 8 "35-39" /*9 "40-44"*/ 10 "45-49" /*11 "50-54"*/ 12 "55-59" /*13 "60-64"*/ 14 "65-69" /*15 "70-74"*/ 16 "75-79" /*17 "80-84"*/ 18 "85+") ///
          xtick(4 (2) 18) ylabel(0 (0.1) 1) ytick(0 (0.1) 1) ///
		  legend(label(1 "Not own, not bach") label(2 "Own, not bach") label(3 "CI LB") label(4 "CI UB") label(5 "CI LB") label(6 "CI LB") ///
		         label(7 "Not own, bach") label(8 "Own, bach") label(9 "CI LB") label(10 "CI UB") label(11 "CI LB") label(12 "CI UB")   ///
				 order(1 2 7 8 3 5 9 11 4 6 10 12) size(2) col(4))   ///
	      graphregion(color(white)) ///
		  ytitle("Proportion moving in the last 10 years") xtitle("Age")  title("HILDA 2011") name(hilda, replace)  
graph save "`working'\HILDAmove10y_mown", replace
graph twoway line move10yhat agegrp if own==0 & bach==0, sort lcolor(gs0) ///
          || line move10yhat agegrp if own==1 & bach==0, sort lcolor(gs4) lpattern(dash_dot) ///
		  || line lb agegrp if own==0 & bach==0, sort lcolor(gs0) lpattern(dot) /*lwidth(thin)*/ ///
		  || line ub agegrp if own==0 & bach==0, sort lcolor(gs0) lpattern(dot) /*lwidth(thin)*/ ///
		  || line lb agegrp if own==1 & bach==0, sort lcolor(gs4) lpattern(dot) /*lwidth(thin)*/ ///
		  || line ub agegrp if own==1 & bach==0, sort lcolor(gs4) lpattern(dot) /*lwidth(thin)*/ ///
		  || line move10yhat agegrp if own==0 & bach==1, sort lcolor(gs8) ///
          || line move10yhat agegrp if own==1 & bach==1, sort lcolor(gs12) lpattern(dash_dot) ///
		  || line lb agegrp if own==0 & bach==1, sort lpattern(dot) lcolor(gs8) /*lwidth(thin)*/ ///
		  || line ub agegrp if own==0 & bach==1, sort lpattern(dot) lcolor(gs8) /*lwidth(thin)*/ ///
		  || line lb agegrp if own==1 & bach==1, sort lpattern(dot) lcolor(gs12) /*lwidth(thin)*/ ///
		  || line ub agegrp if own==1 & bach==1, sort lpattern(dot) lcolor(gs12) /*lwidth(thin)*/ ///
		  xlabel(4 "15-19" /* 5 "20-24" */ 6 "25-29" /*7 "30-34"*/ 8 "35-39" /*9 "40-44"*/ 10 "45-49" /*11 "50-54"*/ 12 "55-59" /*13 "60-64"*/ 14 "65-69" /*15 "70-74"*/ 16 "75-79" /*17 "80-84"*/ 18 "85+") ///
          xtick(4 (2) 18) ylabel(0 (0.1) 1) ytick(0 (0.1) 1) ///
		  legend(label(1 "Not own, not bach") label(2 "Own, not bach") label(3 "CI LB") label(4 "CI UB") label(5 "CI LB") label(6 "CI UB") ///
		         label(7 "Not own, bach") label(8 "Own, bach") label(9 "CI LB") label(10 "CI UB") label(11 "CI LB") label(12 "CI UB")   ///
				 order(1 2 7 8 3 5 9 11 4 6 10 12) size(2) col(4))   ///
	      graphregion(color(white)) ///
		  ytitle("Proportion moving in the last 10 years") xtitle("Age")  title("HILDA 2011") name(hilda_gs, replace)  
graph save "`working'\HILDAmove10y_mown_gs", replace
restore
preserve
keep if ds==3
graph twoway line move10yhat agegrp if own==0 & bach==0, sort lcolor(midblue) ///
          || line move10yhat agegrp if own==1 & bach==0, sort lcolor(cranberry) ///
		  || line lb agegrp if own==0 & bach==0, sort lcolor(midblue) lpattern(shortdash) lwidth(thin) ///
		  || line ub agegrp if own==0 & bach==0, sort lcolor(midblue) lpattern(shortdash) lwidth(thin) ///
		  || line lb agegrp if own==1 & bach==0, sort lcolor(cranberry) lpattern(shortdash) lwidth(thin) ///
		  || line ub agegrp if own==1 & bach==0, sort lcolor(cranberry) lpattern(shortdash) lwidth(thin) ///
		  || line move10yhat agegrp if own==0 & bach==1, sort lcolor(midgreen) ///
          || line move10yhat agegrp if own==1 & bach==1, sort lcolor(gold) ///
		  || line lb agegrp if own==0 & bach==1, sort lpattern(shortdash) lcolor(midgreen) lwidth(thin) ///
		  || line ub agegrp if own==0 & bach==1, sort lpattern(shortdash) lcolor(midgreen) lwidth(thin) ///
		  || line lb agegrp if own==1 & bach==1, sort lpattern(shortdash) lcolor(gold) lwidth(thin) ///
		  || line ub agegrp if own==1 & bach==1, sort lpattern(shortdash) lcolor(gold) lwidth(thin) ///
		  xlabel(4 "15-19" /* 5 "20-24" */ 6 "25-29" /*7 "30-34"*/ 8 "35-39" /*9 "40-44"*/ 10 "45-49" /*11 "50-54"*/ 12 "55-59" /*13 "60-64"*/ 14 "65-69" /*15 "70-74"*/ 16 "75-79" /*17 "80-84"*/ 18 "85+") ///
          xtick(4 (2) 18) ylabel(0 (0.1) 1) ytick(0 (0.1) 1) ///
		  legend(label(1 "Not own, not bach") label(2 "Own, not bach") label(3 "CI LB") label(4 "CI UB") label(5 "CI LB") label(6 "CI UB") ///
		         label(7 "Not own, bach") label(8 "Own, bach") label(9 "CI LB") label(10 "CI UB") label(11 "CI LB") label(12 "CI UB")   ///
				 order(1 2 7 8 3 5 9 11 4 6 10 12) size(2) col(4))   ///
	      graphregion(color(white)) ///
		  ytitle("Proportion moving in the last 10 years") xtitle("Age")  title("ACLD 2006-2011") name(acld, replace) 
graph save "`working'\ACLDmove10y_mown", replace
graph twoway line move10yhat agegrp if own==0 & bach==0, sort lcolor(gs0) ///
          || line move10yhat agegrp if own==1 & bach==0, sort lcolor(gs4) lpattern(dash_dot) ///
		  || line lb agegrp if own==0 & bach==0, sort lcolor(gs0) lpattern(dot) /*lwidth(thin)*/ ///
		  || line ub agegrp if own==0 & bach==0, sort lcolor(gs0) lpattern(dot) /*lwidth(thin)*/ ///
		  || line lb agegrp if own==1 & bach==0, sort lcolor(gs4) lpattern(dot) /*lwidth(thin)*/ ///
		  || line ub agegrp if own==1 & bach==0, sort lcolor(gs4) lpattern(dot) /*lwidth(thin)*/ ///
		  || line move10yhat agegrp if own==0 & bach==1, sort lcolor(gs8) ///
          || line move10yhat agegrp if own==1 & bach==1, sort lcolor(gs12) lpattern(dash_dot) ///
		  || line lb agegrp if own==0 & bach==1, sort lpattern(dot) lcolor(gs8) /*lwidth(thin)*/ ///
		  || line ub agegrp if own==0 & bach==1, sort lpattern(dot) lcolor(gs8) /*lwidth(thin)*/ ///
		  || line lb agegrp if own==1 & bach==1, sort lpattern(dot) lcolor(gs12) /*lwidth(thin)*/ ///
		  || line ub agegrp if own==1 & bach==1, sort lpattern(dot) lcolor(gs12) /*lwidth(thin)*/ ///
		  xlabel(4 "15-19" /* 5 "20-24" */ 6 "25-29" /*7 "30-34"*/ 8 "35-39" /*9 "40-44"*/ 10 "45-49" /*11 "50-54"*/ 12 "55-59" /*13 "60-64"*/ 14 "65-69" /*15 "70-74"*/ 16 "75-79" /*17 "80-84"*/ 18 "85+") ///
          xtick(4 (2) 18) ylabel(0 (0.1) 1) ytick(0 (0.1) 1) ///
		  legend(label(1 "Not own, not bach") label(2 "Own, not bach") label(3 "CI LB") label(4 "CI UB") label(5 "CI LB") label(6 "CI UB") ///
		         label(7 "Not own, bach") label(8 "Own, bach") label(9 "CI LB") label(10 "CI UB") label(11 "CI LB") label(12 "CI UB")   ///
				 order(1 2 7 8 3 5 9 11 4 6 10 12) size(2) col(4))   ///
	      graphregion(color(white)) ///
		  ytitle("Proportion moving in the last 10 years") xtitle("Age")  title("ACLD 2006-2011") name(acld_gs, replace) 
graph save "`working'\ACLDmove10y_mown_gs", replace
restore
grc1leg acld hilda, /*title("Ten year residential mobility")*/ name(acld_hilda, replace) graphregion(fcolor(white))
graph display, xsize(20) ysize(10)
graph export "`working'\HILDA_ACLDmove10y_mown.png", replace
graph export "`working'\HILDA_ACLDmove10y_mown.svg", replace
grc1leg acld_gs hilda_gs, /*title("Ten year residential mobility")*/ name(acld_hilda_gs, replace) graphregion(fcolor(white))
graph display, xsize(20) ysize(10)
graph export "`working'\HILDA_ACLDmove10y_mown_gs.png", replace
graph export "`working'\HILDA_ACLDmove10y_mown_gs.svg", replace


log close
